{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import subprocess\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from PIL import Image\n",
    "from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit\n",
    "\n",
    "import env\n",
    "from utils import KaggleCameraDataset, progress_iter\n",
    "\n",
    "%matplotlib inline\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### useful routines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def jpg_quality(filepath):\n",
    "    bash_cmd = \"identify -verbose {0} | grep Quality\".format(filepath)\n",
    "    process = subprocess.Popen(bash_cmd.split(), stdout=subprocess.PIPE)\n",
    "    output, error = process.communicate()\n",
    "    output = filter(lambda s: 'Quality' in s, output.split('\\n'))\n",
    "    if not output: \n",
    "        return None\n",
    "    return int(filter(lambda c: c.isdigit(), output[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0, 'HTC-1-M7'),\n",
       " (1, 'LG-Nexus-5x'),\n",
       " (2, 'Motorola-Droid-Maxx'),\n",
       " (3, 'Motorola-Nexus-6'),\n",
       " (4, 'Motorola-X'),\n",
       " (5, 'Samsung-Galaxy-Note3'),\n",
       " (6, 'Samsung-Galaxy-S4'),\n",
       " (7, 'Sony-NEX-7'),\n",
       " (8, 'iPhone-4s'),\n",
       " (9, 'iPhone-6')]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(enumerate(KaggleCameraDataset.target_labels()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[True, True, True, True, True, False, False, False, False, False]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "KaggleCameraDataset.is_rotation_allowed()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def resolutions(): # with all possible rotations\n",
    "    return [\n",
    "        [(1520, 2688), (2688, 1520)], # 0\n",
    "        [(3024, 4032), (4032, 3024)], # 1\n",
    "        [(2432, 4320), (4320, 2432)], # 2\n",
    "        [(1040, 780), (3088, 4130), (4130, 3088), (3120, 4160), (4160, 3120)], # 3\n",
    "        [(3120, 4160), (4160, 3120), (4160, 2340)], # 4\n",
    "        [(4128, 2322)], # 5\n",
    "        [(4128, 2322)], # 6\n",
    "        [(6000, 4000)], # 7\n",
    "        [(3264, 2448)], # 8\n",
    "        [(3264, 2448)], # 9\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(3120, 4160) in resolutions()[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(4000, 6000) in resolutions()[7]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## load all the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data = KaggleCameraDataset('../data/', train=True)\n",
    "glebs_train_data = KaggleCameraDataset('../data/glebs-train/', train=True)\n",
    "glebs_val_data = KaggleCameraDataset('../data/glebs-val/', train=True)\n",
    "alex_firsov_data = KaggleCameraDataset('../data/alex-firsov/', train=True)\n",
    "esato_data = KaggleCameraDataset('../data/esato-com/', train=True)\n",
    "artgor_data = KaggleCameraDataset('../data/artgor/', train=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## check quality of original training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_d = dict.fromkeys(train_data.X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for k in progress_iter(sorted(train_d), True):\n",
    "    train_d[k] = jpg_quality(k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9f007f61e2674cff96fba0252f6af15b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "train_stats = []\n",
    "for x, y in progress_iter(zip(train_data.X, train_data.y), True):\n",
    "    train_stats.append((x, y, jpg_quality(x)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../data/orig_stats.json', 'w') as f:\n",
    "    json.dump(train_stats, f, indent=4, sort_keys=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "92"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(filter(bool, train_d.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([  1.00000000e+00,   1.00000000e+01,   0.00000000e+00,\n",
       "          0.00000000e+00,   0.00000000e+00,   1.64900000e+03,\n",
       "          8.33000000e+02,   0.00000000e+00,   0.00000000e+00,\n",
       "          2.57000000e+02]),\n",
       " array([ 92. ,  92.6,  93.2,  93.8,  94.4,  95. ,  95.6,  96.2,  96.8,\n",
       "         97.4,  98. ]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEACAYAAAC6d6FnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFspJREFUeJzt3X+wZGV95/H3ByZYxsQBdGXWGX7ID5WYIBpB1s1qR0QH\nUhGTXRNMWSCykYgaohsj6JZziUkWqFgqayXsliOBVBB/7SruEkFWei1q+aGFBAwDjBuFmSGMpTJU\nNCsFzHf/6DNj2/adudN97z09c96vqltz+ttPn36e6b7nc85zuu9JVSFJ6p792u6AJKkdBoAkdZQB\nIEkdZQBIUkcZAJLUUQaAJHXUbgMgyfokW5PcNVR7YZJbknw9ye1JThi677IkG5PcmeT4ofpZSe5P\ncl+SMxd/KJKkPbGQI4ArgNeM1C4F1lXVi4B1zW2SnAYcVVXHAOcClzf1g4D3AycALwXWJVm5KCOQ\nJE1ktwFQVTcDj4yUtwM7NuAHAlua5dcCVzWPuw1YmeQQBgFyQ1U9WlXbgBuAtdN3X5I0qRUTPu6d\nwPVJPggEeFlTXw1sGmq3uamN1rc0NUlSSyY9CfxW4PyqOoxBGHy8qWekXYAaU6epS5JaMukRwFlV\ndT5AVX0mycea+mbg0KF2a4CHmnpvpH7TuBUnMRgkaQJVNW5ne14LPQIIP7kXvyXJKwCSnAxsbOrX\nAmc29ZOAbVW1FbgeOCXJyuaE8ClNbb5B7LM/69ata70Pjs/xdXF8+/LYqibbb97tEUCSqxnsvT8j\nyYMMPvXzu8BlSfYHfgS8pdlwX5fktCTfBH4InN3UH0nyAeBrDKZ+LqrByWBJUkt2GwBV9Tvz3PWS\nedq/fZ76XwF/tdCOSZKWlt8EXma9Xq/tLiwpx7d325fHty+PbVKZdO5oqSSpWeuTJM26JNQSnQSW\nJO1jDABJ6igDQJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSOMgAkqaMMAEnqKANAkjrKAJCkjjIA\npD2watURJGn1Z9WqI9r+b9A+wr8GKu2BZMdlrlvtxcRXgNK+a0n+GmiS9Um2JrlrpP6OJPcmuTvJ\nxUP1C5NsTLIhyauH6mub9vcnec+edFKStPh2ewSQ5FeAHwBXVdVxTa0HvBc4raqeSPLMqvpukmOB\nq4ETGFz4/UbgGAbXE74fOJnBReK/CpxRVfeOeT6PADSzPALQrJrkCGAhl4S8OcnhI+W3AhdX1RNN\nm+829dOBa5r6t5NsBE5kEAAbq+qBpqPXNG1/KgAkSctj0pPAzwVenuTWJDcl+eWmvhrYNNRuS1Mb\nrW9uapKkluz2CGAXjzuwqk5KcgLwaeBIBnv6o4rxQTPvMezc3NzO5V6v57U8JWlEv9+n3+9PtY4F\nfQqomQL6wtA5gOsYTAF9pbm9ETgJ+F2Aqrq4qX8RWMcgGOaqam1Tv2DQrC4Z81yeA9DM8hyAZtVS\nXhM4/OTe/ecYnNAlyXOBA6rqe8C1wG8nOSDJc4CjgdsZnPQ9OsnhSQ4AzmjaSpJastspoCRXAz3g\nGUkeZLBH/3HgiiR3A48BZwJU1T1JPgXcAzwOnNfszj+Z5O3ADQxCZ31VbViC8UiSFsgvgkl7wCkg\nzaqlnAKSJO1jDABJ6igDQJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSOMgAkqaMMAEnqKANAkjrK\nAJCkjjIAJKmjDABJ6igDQJI6ygCQpI7abQAkWZ9ka5K7xtz3h0m2Jzl4qHZZko1J7kxy/FD9rCT3\nJ7kvyZmLNwRJ0iQWcgRwBfCa0WKSNcCrgAeGaqcCR1XVMcC5wOVN/SDg/cAJwEuBdUlWTt17SdLE\ndhsAVXUz8MiYuz4EvHukdjpwVfO424CVSQ5hECA3VNWjVbWNwbWB107TcUnSdCY6B5Dk14FNVXX3\nyF2rgU1Dtzc3tdH6lqYmSWrJij19QJKnAu8DThl395jbNaYOu7iy9tzc3M7lXq9Hr9fb025K0j6t\n3+/T7/enWkeq5t0O/7hRcjjwhao6LskvAjcC/8xgw76GwR79icAfAzdV1Sebx90LvAL4VaBXVb/X\n1C8fbjfyXLWQPkltSHbs07TaC/wd0agkVNW4ne15LXQKKM0PVfWNqlpVVUdW1XMYTPO8qKq+A1wL\nnNl05iRgW1VtBa4HTkmysjkhfEpTkyS1ZCEfA70a+D/Ac5M8mOTskSY7p3iq6jrgW0m+CfwX4Lym\n/gjwAeBrwG3ARc3JYElSSxY0BbScnALSLHMKSLNqKaeAJEn7GANAkjrKAJCkjjIAJKmjDABJ6igD\nQJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSOMgAkqaMMAEnqKANAkjrKAJCkjjIAJKmjFnJFsPVJ\ntia5a6h2aZINSe5M8tkkTx+678IkG5v7Xz1UX5vk3iT3J3nP4g9FkrQnFnIEcAXwmpHaDcALqup4\nYCNwIUCSXwB+CzgWOBX4iwzsB3y0Wc8LgDckef7iDEGSNIndBkBV3Qw8MlK7saq2NzdvBdY0y68F\nrqmqJ6rq2wzC4cTmZ2NVPVBVjwPXAKcvzhAkSZNYjHMAbwaua5ZXA5uG7tvS1Ebrm5uaJKklK6Z5\ncJL3AY9X1Sd2lMY0K8YHzbxXtZ6bm9u53Ov16PV6k3dSkvZB/X6ffr8/1TpSNe92+MeNksOBL1TV\ncUO1s4C3AK+sqsea2gVAVdUlze0vAusYBMNcVa0d127kuWohfZLakIRd7LssVy/wd0SjklBV43bC\n57XQKaAwtHefZC3wR8Brd2z8G9cCZyQ5IMlzgKOB24GvAkcnOTzJAcAZTVtJUkt2OwWU5GqgBzwj\nyYMM9ujfCxwAfGmwR8StVXVeVd2T5FPAPcDjwHnN7vyTSd7O4NND+wHrq2rDUgxIkrQwC5oCWk5O\nAWmWOQWkWbWUU0CSpH2MASBJHWUASFJHGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRRBoAkdZQBIEkd\nZQBIUkcZAJLUUQaAJHWUASBJHWUASFJH7TYAkqxPsjXJXUO1g5LckOS+JNcnWTl032VJNia5M8nx\nQ/WzktzfPObMxR+KJGlPLOQI4ArgNSO1C4Abq+p5wJeBCwGSnAocVVXHAOcClzf1g4D3AycALwXW\nDYeGJGn57TYAqupm4JGR8unAlc3ylc3tHfWrmsfdBqxMcgiDALmhqh6tqm0MLg25dvruS5ImNek5\ngGdV1VaAqnoYeFZTXw1sGmq3uamN1rc0NUlSSxb7JPDo9Sh3XEB13HUqvaipJLVoxYSP25rkkKra\nmmQV8J2mvhk4dKjdGuChpt4bqd8038rn5uZ2Lvd6PXq93nxNJamT+v0+/X5/qnWkavc74kmOAL5Q\nVb/U3L4E+H5VXZLkAuDAqrogyWnA26rq15KcBHy4qk5qTgJ/DXgxg6OOrwG/3JwPGH2uWkifpDYk\nOw5qW+0F/o5oVBKqatxsy7x2ewSQ5GoGe+/PSPIgsA64GPh0kjcDDwKvB6iq65KcluSbwA+Bs5v6\nI0k+wGDDX8BF4zb+kqTls6AjgOXkEYBmmUcAmlWTHAH4TWBJ6igDQJI6ygCQpI4yACSpowwASeoo\nA0CSOsoAkKSOMgAkqaMMAEnqKANAkjrKAJCkjjIAJKmjDABJ6igDQJI6ygCQpI4yACSpo6YKgCTv\nTPKNJHcl+ZskByQ5IsmtSe5L8okkK5q2ByS5JsnGJLckOWxxhiBJmsTEAZDk2cA7gBdX1XEMLi/5\nBuAS4INV9TxgG3BO85BzGFxH+Bjgw8Cl03Rc6q6nkKT1n1Wrjmj7P0JTmnYKaH/gac1e/lOBh4Bf\nBT7b3H8l8Lpm+fTmNsBngJOnfG6pox5jcFnKdn+2bn1gyUeqpTVxAFTVQ8AHGVwUfgvwKHAHsK2q\ntjfNNgOrm+XVwKbmsU8C25IcPOnzS5KmM80U0IEM9uoPB54NPA04dUzTHVevHr1Y8SxcXVuSOmvF\nFI99FfAPVfV9gCT/HXgZcGCS/ZqjgDUMpoVgcDRwKPBQkv2Bp1fVI+NWPDc3t3O51+vR6/Wm6KYk\n7Xv6/T79fn+qdaRqsp3wJCcC64ETGExKXgF8FXg58N+q6pNJ/hL4u6q6PMl5wC9W1XlJzgBeV1Vn\njFlvTdonaakls3DgOgt9AAj+rs6OJFTV6EzLrh8zzQuYZB1wBvA48HXg3zPY678GOKipvbGqHk/y\nFOCvgRcB3wPOqKpvj1mnAaCZZQAMMwBmybIHwFIwADTLDIBhBsAsmSQA/CawJHWUASBJHWUASFJH\nGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRRBoAkdZQBIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJH\nGQCS1FFTBUCSlUk+nWRDkr9P8tIkByW5Icl9Sa5PsnKo/WVJNia5M8nx03dfkjSpaY8APgJcV1XH\nAi8E7gUuAG6squcBXwYuBEhyKnBUVR0DnAtcPuVzS5KmMM1F4X8euLOqjhqp3wu8oqq2JlkF3FRV\nxya5vFn+ZNNuA9Crqq0jj/eSkJpZXhJymJeEnCXLfUnII4HvJrkiyR1J/muSnwUO2bFRr6qHgWc1\n7VcDm4Yev6WpSZJasGLKx74YeFtVfS3JhxhM/8y3SzAumca2nZub27nc6/Xo9XpTdFOS9j39fp9+\nvz/VOqaZAjoEuKWqjmxu/wqDADiKZmpnN1NAO6eKRtbrFJBmllNAw5wCmiXLOgXUbLg3JXluUzoZ\n+HvgWuBNTe1NwOeb5WuBM5uOngRsG934S5KWz8RHAABJXgh8DPgZ4B+As4H9gU8BhwIPAq+vqm1N\n+48Ca4EfAmdX1R1j1ukRgGaWRwDDPAKYJZMcAUwVAEvBANAsMwCGGQCzZLk/BSRJ2osZAJLUUQaA\nJHWUASBJHWUASFJHGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRRBoAkdZQBIEkdZQBIUkcZAJLUUQaA\nJHWUASBJHTV1ACTZL8kdSa5tbh+R5NYk9yX5RJIVTf2AJNck2ZjkliSHTfvckqTJLcYRwPnAPUO3\nLwE+WFXPA7YB5zT1c4DvV9UxwIeBSxfhuSVJE5oqAJKsAU5jcF3gHV4JfLZZvhJ4XbN8enMb4DMM\nLiIvSWrJtEcAHwLeTXOB0iTPAB6pqu3N/ZuB1c3yamATQFU9CWxLcvCUzy9JmtCKSR+Y5NeArVV1\nZ5LejnLzM6yG7vuJVTDPla3n5uZ2Lvd6PXq93rhmktRZ/X6ffr8/1TpSNXYbvPsHJn8GvBF4Angq\n8PPA54BXA6uqanuSk4B1VXVqki82y7cl2R/4x6p61pj11qR9kpZaMu9+y3L2Ygb6ABD8XZ0dSaiq\n0R3tXZp4Cqiq3ltVh1XVkcAZwJer6o3ATcDrm2ZnAZ9vlq9tbtPc/+VJn1uSNL2l+B7ABcC7ktwP\nHAysb+rrgWcm2Qj8QdNOktSSiaeAlopTQJplTgENcwpolizrFJAkae9mAEhSRxkAktRRBoAkdZQB\nIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJHGQCS1FEGgCR1lAEgSR018RXBJEkDq1YdwdatD7Td\njT3mn4OW9oB/DnqYfw56h1l5X/jnoCVJCzJxACRZk+TLSe5JcneS32/qByW5Icl9Sa5PsnLoMZcl\n2ZjkziTHL8YAJEmTmeYI4AngXVX1C8C/At6W5PkMLvV4Y1U9j8F1fy8ESHIqcFRVHQOcC1w+Vc8l\nSVOZ5qLwD1fVnc3yD4ANwBrgdODKptmVzW2af69q2t8GrExyyKTPL0mazqKcA0hyBHA8cCtwSFVt\nhUFIAM9qmq0GNg09bEtTkyS1YOqPgSb5OeAzwPlV9YMk850KH3d2emzbubm5ncu9Xo9erzdlLyVp\nX9NvfiY31cdAk6wA/gfwt1X1kaa2AehV1dYkq4CbqurYJJc3y59s2t0LvGLH0cLQOv0YqGbWrHzc\nr/0+gB8D/bFZeV8s98dAPw7cs2Pj37gWeFOz/Cbg80P1MwGSnARsG934S5KWz8RHAEn+NfAV4G4G\n0VfAe4HbgU8BhwIPAq+vqm3NYz4KrAV+CJxdVXeMWa9HAJpZs7Kn134fwCOAH5uV98WeHgH4TWBp\nD8zKL3r7fQAD4Mdm5X3hN4ElSQtiAEhSRxkAktRRBoAkdZQBIEkdZQBIUkcZAJLUUQaAJHWUASBJ\nHWUASFJHGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRRyx4ASdYmuTfJ/Unes9zPL0kaWNYASLIf8FHg\nNcALgDckef5y9qFt/X6/7S4sqX19fPu+ftsdWDK+N3/ach8BnAhsrKoHqupx4Brg9GXuQ6v29Tfh\nvj6+fV+/7Q4sGd+bP225A2A1sGno9uamJklaZiuW+fnGXa/ypy6kObi+ZrsOPvjZfP/7Dy3Jui+6\n6KIFtTvkkMN5+OFvL0kfJGlZLwqf5CRgrqrWNrcvAKqqLhlq0/aVlSVpr7SnF4Vf7gDYH7gPOBn4\nR+B24A1VtWHZOiFJApZ5CqiqnkzyduAGBucf1rvxl6R2LOsRgCRpdrT+TeAk5ye5u/n5/aZ2aZIN\nSe5M8tkkT2+7n5OaZ3x/nOTvknw9yReTrGq7n5MYN7ah+/4wyfYkB7fVv2nN89qtS7I5yR3Nz9q2\n+zmpkfGdP1R/R/NlzbuTXNxmH6cxz+t3zdBr960kd7Tdz0nNM74XJrml2bbcnuQlu1xJVbX2w+DL\nYHcBTwH2ZzA1dBTwKmC/ps3FwH9qs59LML6fG2rzDuAv2+7rIoztS8BRzX1rgC8C3wIObruvi/ja\nHQ2sA97Vdv+WaHxHAb1meUXT7plt93WRxrfz/TnU5s+B/9h2Xxfx9TsauB54ddPmVOCmXa2n7SOA\nY4Fbq+qxqnoS+ArwG1V1Y1Vtb9rcymCDsjeab3w/GGrzNGD72EfPttGx/W/gN5r7PgS8u7WeLY6x\nr11zX/ufU57euPH9JvBW4OKqegKgqr7bYh+nsav35w6/BXxi2Xu2OOZ7f24HVjZtDgS27GolbQfA\nN4CXJzkoyc8CpwGHjrR5M/C3y96zxTHv+JL8SZIHgd8B3t9iHyc1dmxJfh3YXFV3t9u9qY0b3xoG\n31t5WzM9+bEkK3e5ltk133vzmKZ+a5KbdjuFMLt2uW1J8m+Ah6vq/7bVwSnN9/58J/DnzbblUuDC\nXa2k9ZPASc4G3g78E3AP8P+q6j80970PeHFV/dsWuziVXY2vuf89wFOraq6dHk5uzNh+BLwMOKWq\n/inJt4CXVNX3WuzmxMa9dgymJL9bVZXkT4B/WVXntNjNic3z+p0C/K+q+oMkJwCfrKojW+zmxHaz\nbfkLBn+W5kMtdnEq87w/VzCY9vlckn8HnFtVp8y7jrYDYFiSPwU2VdXlSc4C3gK8sqoea7lri2J4\nfEO1w4D/WVW/1F7PpteM7WHgfcA/M5gmWcPgEPTEqvpOi92b2jyv3eHAF6rquPZ6tjh2jA94LYMp\noK809W8CL91bQ3yHkW3L/gzely+uqqX5uv8ya8a3GfizqjpoqP5oVc17lNr2FBBJ/kXz72EM5rA+\n0Xyy4o+A1+7tG/95xnf0UJPTgb3yuxBjxnZVVa2qqiOr6jkM3pAv2ls3/vO8dsOf2PpNBofie6Vx\n4wM+z+CLmiR5LvAze+vGf57xweAoZ8PevvEfM76rgYeSvKKpnwzcv6t1LPffAhrns81HBR8Hzquq\nR5P8Z+AA4EsZ/F2gW6vqvDY7OYVx41vf/HJtBx4Afq/VHk7up8Y2cn+xd58wHfveTHI8g9fu28C5\nbXZwSuPG93Hg40nuBh4Dzmy1h9OZ7/352+y9J3+HjXv93gJ8pDnK+RGDWZR5zdQUkCRp+bQ+BSRJ\naocBIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJHGQCS1FH/H9KTd4CiVSgUAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f90191a94d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(filter(bool, train_d.values()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## collect stats about rest of the data\n",
    "### 1) discard broken images, 2) check resolution, and store JPG quality"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def make_stats(dataset):\n",
    "    x_fpaths = dataset.X\n",
    "    stats = {}\n",
    "    for i in progress_iter(range(len(dataset)), verbose=True):\n",
    "        try:\n",
    "            x, y = dataset[i]\n",
    "            x = np.asarray(x, dtype=np.uint8)\n",
    "            if len(x.shape) != 3 or x.shape[0] < 1024 or x.shape[1] < 1024:\n",
    "                continue\n",
    "            d = {}\n",
    "            d['x_fpath'] = x_fpaths[i]\n",
    "            d['y'] = y\n",
    "            d['res'] = (x.shape[0], x.shape[1])\n",
    "            d['res_valid'] = d['res'] in resolutions()[y]\n",
    "            d['quality'] = jpg_quality(x_fpaths[i])\n",
    "            stats[i] = d\n",
    "        except:\n",
    "            pass\n",
    "    return stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### alex firsov's"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "50d5c0e55cd74495a1085ad89385d7da",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "alex_firsov_stats = make_stats(alex_firsov_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../data/alex_firsov_stats.json', 'w') as f:\n",
    "    json.dump(alex_firsov_stats, f, indent=4, sort_keys=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "85"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(map(lambda d: d['quality'], alex_firsov_stats.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([   3.,    0.,    0.,    2.,    0.,    3.,    3.,  332.,    0.,    1.]),\n",
       " array([ 85. ,  86.4,  87.8,  89.2,  90.6,  92. ,  93.4,  94.8,  96.2,\n",
       "         97.6,  99. ]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEACAYAAABfxaZOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFChJREFUeJzt3X+wZGV95/H3BwZQtBxBhdkwCATRkBh2MCsS3VrbwkJI\n1WZcq8iaH6u4uuUmmlDsZnVIqjJXy9SiK27iWhZ/qOyQCgFWIyBRQISOZUoGIzNhYADHVWTAzOxu\n4hiBCgXMd//oM9BM7p3bc2/37Xvneb+quu7p5zznnO/t2+fTTz/dfTtVhSSpDYdNuwBJ0tIx9CWp\nIYa+JDXE0Jekhhj6ktQQQ1+SGjJv6Cc5KsnmJFuSbEuysWu/Isn3uva7kpwxtM0nk+xIsjXJukn+\nApKk0a2ar0NVPZHkTVX1eJLDgb9KclO3+ner6s+H+yc5Hzi1qk5L8jrgcuDssVcuSTpoI03vVNXj\n3eJRDB4o9nbXM0v39cCV3XabgdVJjl9knZKkMRgp9JMclmQLsAv4alV9q1v1kW4K57IkR3RtJwA7\nhzZ/pGuTJE3ZqCP9vVV1JrAWOCvJzwIbqup04LXAS4APdt1nG/37vx4kaRmYd05/WFX9Q5K/BM6r\nqk90bU8muQL4z123h4EThzZbC/xw/30l8YFAkhagqmYbXI9klHfvvDTJ6m75+cCbgfuTrOnaArwV\nuKfb5AbgHd26s4E9VbV7jsKX/WXjxo1Tr8E6rXOl1mid478s1igj/X8GbEpyGIMHiWuq6stJvpbk\npQymc7YC/7EL8i8n+aUk3wUeA9616ColSWMxyls2twGvmaX9nANs8/5F1iVJmgA/kTuPXq837RJG\nYp3jtRLqXAk1gnUuNxnHHNGCDpzUtI4tSStVEmqSL+RKkg4dhr4kNcTQl6SGGPqS1BBDX5IaYuhL\nDVuz5mSSLOllzZqTp/1rN823bEoNG/wXlaU+DzOWfyfQKt+yKUkamaEvSQ0x9CWpIYa+JDXE0Jek\nhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkHlDP8lRSTYn2ZJkW5KN\nXfvJSe5I8kCSP0uyqms/MsnVSXYk+WaSl0/6l5AkjWbe0K+qJ4A3VdWZwDrg/CSvAz4KXFZVrwL2\nAO/uNnk38PdVdRrwR8DHJlK5JOmgjTS9U1WPd4tHAasYfOvCm4AvdO2bgLd2y+u76wCfB84ZS6WS\npEUbKfSTHJZkC7AL+Crwv4E9VbW36/IwcEK3fAKwE6Cqngb2JDl2rFVLkhZk1SidunA/M8mLgC8C\np8/Wrfu5/9d4zfl9bDMzM88s93o9er3eKOVIUjP6/T79fn9s+zvo78hN8gfA48AHgDVVtTfJ2cDG\nqjo/yU3d8uYkhwN/W1XHzbIfvyNXmjK/I3flmfh35CZ5aZLV3fLzgTcD24HbgQu6bu8Eru+Wb+iu\n062/baHFSZLGa96RfpKfZ/DC7GHd5Zqq+sMkpwBXA8cAW4DfqKonkxwF/AlwJvB3wNur6sFZ9utI\nX5oyR/orz2JH+gc9vTMuhr40fYb+yjPx6R1J0qHD0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kN\nMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBD\nX5IaYuhLUkMMfUlqiKEvSQ2ZN/STrE1yW5LtSbYl+e2ufWOSh5Pc1V3OG9rmkiQ7ktyX5NxJ/gKS\npNGlqg7cIVkDrKmqrUleCHwbWA/8W+AnVfWJ/fqfDlwFvBZYC9wKnFb7HSjJ/k2SllgSYKnPw+C5\nv3BJqKosdPt5R/pVtauqtnbLjwL3ASfsO/4sm6wHrq6qp6rqQWAHcNZCC5Qkjc9BzeknORlYB2zu\nmt6XZGuSzyRZ3bWdAOwc2uwRnn2QkCRN0apRO3ZTO58HLqqqR5N8GvhwVVWSjwCXAe9h9tH/rM/l\nZmZmnlnu9Xr0er3RK5ekBvT7ffr9/tj2N++cPkCSVcCNwFeq6o9nWX8S8KWqOiPJBqCq6qPdupuA\njVW1eb9tnNOXpsw5/ZVn4nP6nc8B24cDv3uBd5+3Afd0yzcAb09yZJJTgFcAdy60QEnS+Mw7vZPk\nDcCvA9uSbGEwLPg94NeSrAP2Ag8C7wWoqu1JrgW2A08Cv+WQXpKWh5GmdyZyYKd3pKlzemflWarp\nHUnSIcDQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLo\nS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDZk39JOsTXJb\nku1JtiX5na79mCS3JHkgyc1JVg9t88kkO5JsTbJukr+AJGl0o4z0nwL+U1X9LPCLwPuS/AywAbi1\nql4F3AZcApDkfODUqjoNeC9w+UQqlyQdtHlDv6p2VdXWbvlR4D5gLbAe2NR129Rdp/t5Zdd/M7A6\nyfFjrluStAAHNaef5GRgHXAHcHxV7YbBAwNwXNftBGDn0GaPdG2SpClbNWrHJC8EPg9cVFWPJqm5\nus7SNmvfmZmZZ5Z7vR69Xm/UciSpCf1+n36/P7b9pWqu7B7qlKwCbgS+UlV/3LXdB/SqaneSNcDt\nVXV6ksu75Wu6fvcDb9z3rGBonzXKsSVNThLmGJNN8qh47i9cEqpqtsH1SEad3vkcsH1f4HduAC7s\nli8Erh9qf0dX3NnAnv0DX5I0HfOO9JO8Afg6sI3BkKCA3wPuBK4FTgQeAi6oqj3dNp8CzgMeA95V\nVXfNsl9H+tKUOdJfeRY70h9pemcSDH1p+gz9lWeppnckSYcAQ1+SGmLoS1JDDH1JaoihL0kNMfQl\nqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5Ia\nYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDVk3tBP8tkku5PcPdS2McnDSe7qLucNrbskyY4k9yU5d1KF\nS5IO3igj/SuAt8zS/omqek13uQkgyenArwCnA+cDn06SsVUrSVqUeUO/qr4B/GiWVbOF+Xrg6qp6\nqqoeBHYAZy2qQknS2CxmTv99SbYm+UyS1V3bCcDOoT6PdG2SpGVg1QK3+zTw4aqqJB8BLgPew+yj\n/5prJzMzM88s93o9er3eAsuRpENTv9+n3++PbX+pmjOTn+2UnAR8qarOONC6JBuAqqqPdutuAjZW\n1eZZtqtRji1pcgYvuS31eRg89xcuCVW14NdKR53eCUOj+CRrhta9DbinW74BeHuSI5OcArwCuHOh\nxUmSxmve6Z0kVwE94CVJHgI2Am9Ksg7YCzwIvBegqrYnuRbYDjwJ/JbDeUlaPkaa3pnIgZ3ekabO\n6Z2VZ6mmdyRJhwBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JD\nDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNWTe\n0E/y2SS7k9w91HZMkluSPJDk5iSrh9Z9MsmOJFuTrJtU4ZKkgzfKSP8K4C37tW0Abq2qVwG3AZcA\nJDkfOLWqTgPeC1w+xlolSYs0b+hX1TeAH+3XvB7Y1C1v6q7va7+y224zsDrJ8eMpVZK0WAud0z+u\nqnYDVNUu4Liu/QRg51C/R7o2SdIysGrM+8ssbTVX55mZmWeWe70evV5vzOVI0srW7/fp9/tj21+q\n5szkZzslJwFfqqozuuv3Ab2q2p1kDXB7VZ2e5PJu+Zqu3/3AG/c9K9hvnzXKsSVNThIOMC6b1FHx\n3F+4JFTVbAPskYw6vROeO4q/AbiwW74QuH6o/R1dYWcDe2YLfEnSdMw70k9yFdADXgLsBjYC1wH/\nCzgReAi4oKr2dP0/BZwHPAa8q6rummO/jvSlKXOkv/IsdqQ/0vTOJBj60vQZ+ivPUk3vSJIOAYa+\nJDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtS\nQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyKrFbJzkQeDHwF7gyao6\nK8kxwDXAScCDwK9U1Y8XWackaQwWO9LfC/Sq6syqOqtr2wDcWlWvAm4DLlnkMSRJY7LY0M8s+1gP\nbOqWNwFvXeQxJEljstjQL+DmJN9K8p6u7fiq2g1QVbuAly3yGJKkMVnUnD7w+qraleRlwC1JHmDw\nQDCSmZmZZ5Z7vR69Xm+R5UjSoaXf79Pv98e2v1SNnNEH3lGyEXgUeA+Def7dSdYAt1fV6bP0r3Ed\nW9LCJOEgxmnjOiqe+wuXhKrKQrdf8PROkqOTvLBbfgFwLrANuAG4sOv2TuD6hR5DkjReCx7pJzkF\n+CKDYcIq4E+r6tIkxwLXAicCDwEXVNWeWbZ3pC9NmSP9lWexI/2xTe8c9IENfWnqDP2VZ2rTO5Kk\nlcfQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JD\nDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWrIxEI/yXlJ7k/ynSQfnNRx\nJEmjm0joJzkM+BTwFuDngF9N8jOTONak9fv9aZcwEuscr5VQ50qocaA/7QJGsnJuz8VZNaH9ngXs\nqKofACS5GlgP3D+h47Fz5042bPgwTz+9d6z7veeeu3j1q18z5/p1617Nhg0Xj/WYC9Hv9+n1etMu\nY17WOT4rocaBPtCbcg3zWzm35+JMKvRPAHYOXX+YwQPBxHzta1/jC1+4lyeeePeY9/x/uffe18+x\n7ifceONHlkXoa7w+/vE/4kMf+tCSHvOww45m797HD2qbpa5RK9+kQj+ztNWEjgXAEUccQfIQL3rR\ndWPd7z/+4wM873mzl171BIcffsRYj7ecrVlzMrt3/2DR+zmYoDr++JPYtevBRR/zYD322I+Z8F32\nn9i7Nwd5zJnushiznaqHplHuv+N+EJ3W/fdAUjX+O3aSs4GZqjqvu74BqKr66FCfpT2jJOkQUVUL\nfrSeVOgfDjwAnAP8LXAn8KtVdd/YDyZJGtlEpneq6ukk7wduYfAOoc8a+JI0fRMZ6UuSlqcl+0Ru\nkouT3JPk7iR/muTIoXX/I8lPlqqWucxVY5I/TPJAknu7ZzDLrs4k5yT5dpItSb6e5KeXQZ0XJdnW\nXX6nazsmyS3d7XlzktXLtM6PJbkvydYkX0jyouVY59C6302yN8mx06pvqJbhOi8aav/t7gOb25Jc\nuoxq3Pc3/+dJvtmdQ3cm+RdTqu2zSXYnuXuobc7zJsknk+zo7qvr5j1AVU38AvwU8D3gyO76NcA7\nuuVfAK4E/mEpajnYGoELgf851O+ly7DOdzJ4DeWVXdtvAp+bcp0/B9wNHAUczmCq7xXAR4EPdH0+\nCFy6DOs8FXgzcFjX51Lgvy6zOr8KnNqtWwvcBHwfOHaZ1bnv9ux1y6u6flM7jw5w37wZOLfrcz5w\n+5Tq+5fAOuDuobZZz5uuzr/oll8H3DHf/pfyf+8cDrwgySrgaOCH3Sd3/xvwX5awjgP5JzUyCNAP\n7+tQVf9vSrUNG67z+cAjwF7gxd361Qxqn6bTGdwBn6iqp4GvA/8G+GVgU9dnE/DWKdW3z6x1VtWt\nVbXvk353MAjWadq/zr9kcHsC/HeWzzk02+35Ngbn0aVV9RRM/Tya6765l8G5A4Nz6ZFpFFdV3wB+\ntF/zep573qwfar+y224zsDrJ8Qfa/5KEflX9ELgMeIjBDbmnqm4F3g9cV1W7mfIbhg9Q46nA25N8\nK8lfJHnFMqvzx12d/wH4cpKHgN9gMDqdpnuAf9U9LT0a+CXgROD47u9NVe0CXjbFGmHuOof9e+Ar\nS17Zc81aZ5J/DTxcVdumW94z5ro9T+va70hy+7SmTg5Q41rgYuDj3Tn0MeCSKda4v+P2O2+O69r3\n/yDsI13bnJYk9JO8mMEj0kkMpidekOTfARcw+B89UzdHjb/O4Cng41X1WuAzwOemV+UB67wYOK+q\nXg5cwWD0NzVVdT+Dp6S3Al8GtgJPTbOm2cxXZ5LfB56sqqumU+HAHHU+Dfw+8AdDXac9eJrr9jwC\neHFVnQ18ALh2Gdb4m8BF3Tl0MVM+10d00B+EXarpnTcD36uqv++eTn0R+BCDUfR3k3wfODrJd5ao\nnlFrfD2DR9E/B6iqLwJnTK9EYPY63wCcUVV/3fW5FvjFaRW4T1VdUVW/UFU9Bk9XvwPs3vf0M8ka\n4P9MsURg1jp3ACR5J4NR4K9NsbxnzFLn94GTgb/pzqG1wLeTHDf3XiZvjr/78Hn0LWBvkpcsoxq/\ny+B1xuu69Z9nwv865iDNdd48zHOfma5lnqndpQr9h4CzkzwvSRh8aOvjVfVTVfXTVXUKg9H0K5eo\nnlFr3A5c1y2TpMfgBdNpmq3OexnM5Z3W9TkXmPrnIpK8rPv5cgZzpn8G3MDgxXEYvAB9/VSKGzJb\nnUnOYzAi/eWqemKa9e0zS51XVtWaoXPoYeDMqprqA+kcf/frefY8eiVwRFX93TKq8SoGrzO+sWs/\nh8GD1bSE547ih8+bC3n2vLmBwRtO9v0nhD37poHmtISvSG9kEER3M3gh4oj91k/13Ttz1cjghZ0b\nu7a/An5+mda5vru+BbgNOHkZ1Pl1BvOnW4Be13Ysg6fVDzB4B8qLl2mdO4AfAHd1l08vxzr3W/89\npvzunQPcnkcAfwJsA/4aeOMyrPENXW1bgG8yeACdRm1XMRitP8FgkPcu4Ji5zhsGU+TfBf4GeM18\n+/fDWZLUEL8uUZIaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQ/w8VEH9r1lam7QAA\nAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f11511a4950>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(map(lambda d: d['quality'], alex_firsov_stats.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(map(lambda d: d['res_valid'], alex_firsov_stats.values()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### gleb's train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6e73589b6cd74eeba93fe51e83d9bd9a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/dist-packages/PIL/TiffImagePlugin.py:742: UserWarning: Corrupt EXIF data.  Expecting to read 12 bytes but only got 7. \n",
      "  warnings.warn(str(msg))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "glebs_train_stats = make_stats(glebs_train_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1691 {'y': 6, 'x_fpath': '../data/glebs-train/train/Samsung-Galaxy-S4/24750425488_00b9cac713_o.jpg', 'res_valid': False, 'quality': 85, 'res': (2322, 4128)}\n"
     ]
    }
   ],
   "source": [
    "for i in sorted(glebs_train_stats):\n",
    "    if glebs_train_stats[i]['x_fpath'] == '../data/glebs-train/train/Samsung-Galaxy-S4/24750425488_00b9cac713_o.jpg':\n",
    "        print i, glebs_train_stats[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11004"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(glebs_train_stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../data/glebs_train_stats.json', 'w') as f:\n",
    "    json.dump(glebs_train_stats, f, indent=4, sort_keys=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "45"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(map(lambda d: d['quality'], glebs_train_stats.values()))  # !!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2664"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(map(lambda d: d['res_valid'], glebs_train_stats.values()))  # ~25%"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([  1.00000000e+00,   0.00000000e+00,   0.00000000e+00,\n",
       "          0.00000000e+00,   4.00000000e+00,   7.00000000e+00,\n",
       "          1.75000000e+02,   5.52000000e+02,   1.71600000e+03,\n",
       "          8.54900000e+03]),\n",
       " array([  45. ,   50.5,   56. ,   61.5,   67. ,   72.5,   78. ,   83.5,\n",
       "          89. ,   94.5,  100. ]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEACAYAAABRQBpkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFzhJREFUeJzt3X+s3XWd5/Hnq1QdcdxLcaVdWxRFZBk3Y0WljWac448K\nhQl1kkEhmZQiJibqQCabiWVmY2/DbLKYEJW4LjHDMsUoiDIO/YNsrwQ67iYCVqiglFJ1tb+WC0rb\nHdnE8OO9f3w/tx4ut/S0vfTce3k+kpP7/b7v53vO95PTntf9fr7f7/mkqpAkvbzNG/YOSJKGzzCQ\nJBkGkiTDQJKEYSBJwjCQJDFgGCS5MslD7XFFqy1IMpZke5JNSUb62l+XZEeSrUmW9tUvTfJo22b1\n9HdHknQ0DhsGSd4OXA68G1gK/FmStwJrgTur6kzgLuCq1n4lcHpVnQF8Cri+1RcAnwfeAywD1vUH\niCRpeAY5MjgLuKeqfldVzwLfB/4cuBDY0NpsAFa15VXATQBVdS8wkmQhcC4wVlUHqmo/MAacN209\nkSQdtUHC4CfA+9uw0InA+cCpwMKqGgeoqseAU1r7xcCuvu13t9rk+p5WkyQN2fzDNaiqR5JcA9wJ\n/CuwFXjmRTbJFOs1RZ1WlyQN2WHDAKCqbgRuBEjyn+n+wh9PsrCqxpMsAh5vzXfTHTlMWALsbfXe\npPrdk18riQEhSUehqqb6o3sgg15N9Pr284105wtuBjYCa1qTNcDtbXkjsLq1Xw7sb8NJm4AVSUba\nyeQVrfYCVTVnH+vWrRv6Ptg/+/dy7N9c7lvVsf8NPdCRAXBbkpOBp4FPV9WBNnR0a5JPADuBi9oH\n+R1Jzk/yM+Ap4LJW35fkamAL3fDQ+upOJEuShmzQYaL3T1F7EvjwIdp/9hD1fwT+cfDdkyQdD96B\nfJz1er1h78JLyv7NbnO5f3O5b9Mh0zHWNJ2S1EzbJ0ma6ZJQL/UJZEnS3GYYSJIMA0mSYSBJwjCQ\nJDH4TWeSpMNYtOg0xsd/NezdOCpeWipJ0ySZ+F7Ooby6l5ZKko6NYSBJMgwkSYaBJAnDQJKEYSBJ\nwjCQJDH4tJd/neQnSR5M8o0kr0xyWpJ7kmxPcnOS+a3tK5PckmRHkh+0qTInnueqVt+W5CMvVack\nSUfmsGGQ5A3AXwFnV9Uf0921fAlwDXBtVZ0J7Acub5tcDjxZVWcAXwK+0J7nj4CPAWcBK4GvprtD\nQ5I0ZIMOE50AvKb99f9qYC/wAeC29vsNwEfb8qq2DvAd4INt+ULglqp6pqp+CewAzjmmvZckTYvD\nhkFV7QWupZv0fg9wALgf2F9Vz7Vmu4HFbXkxsKtt+yxwIMnJ/fVmT982kqQhOuwX1SU5ie6v/TfR\nBcG36YZ5Jpv4Qo6phn7qReovMDo6enC51+s5d6kkvcDm9pgeg3xr6YeBX1TVkwBJvgu8Fzgpybx2\ndLCEbugIuqOEU4G9SU4ARqpqX5KJ+oT+bZ6nPwwkSVPptceE9cf0bIOcM9gJLE/yB+2E74eAnwJ3\nAxe1NpcCt7fljW2d9vu7+uoXt6uN3gy8FbjvmPZekjQtBvoK6yTrgIuBp4EHgE/S/WV/C7Cg1f6y\nqp5O8irg68A7gd8AF7cTxiS5iu5qo6eBK6tqbIrX8iusJc1Ks/krrJ3PQJKmyWwOA+9AliQZBpIk\nw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkBgiD\nJG9L8kCS+9vPA0muSLIgyViS7Uk2JRnp2+a6JDuSbE2ytK9+aZJH2zarX6pOSZKOzBHNdJZkHt2E\n98uAzwK/qaovJPkcsKCq1iZZCXy2qi5Isgz4clUtT7IA2AKcDQT4EXB2VR2Y9BrOdCZpVno5zXT2\nYeDnVbULWAVsaPUNbZ328yaAqroXGEmyEDgXGKuqA1W1HxgDzjvaHZckTZ8jDYOPA99sywurahyg\nqh4DTmn1xcCuvm12t9rk+p5WkyQN2fxBGyZ5BXAh8LlWOtSx0OTDlInjpqkOX6Z8jtHR0YPLvV6P\nXq836G5K0svE5vaYHgOHAbAS+FFV/bqtjydZWFXjSRYBj7f6buDUvu2WAHtbvTepfvdUL9QfBpKk\nqfR4/kfq+mN6tiMZJroEuLlvfSOwpi2vAW7vq68GSLIc2N+GkzYBK5KMtJPJK1pNkjRkA11NlOTV\nwE7gLVX1r612MnAr3VHATuCidmKYJF+hOzn8FHBZVd3f6muAv6MbHvr7qrppitfyaiJJs9Jsvpro\niC4tPR4MA0mz1WwOA+9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRh\nIEnCMJAkYRhIkjAMJEkYBpIkBgyDNlXlt5NsS/LTJMuSLEgylmR7kk1JRvraX5dkR5KtSZb21S9N\n8mjbZvVL0SFJ0pEb9Mjgy8AdVXUW8A7gEWAtcGdVnQncBVwFkGQlcHpVnQF8Cri+1RcAnwfeAywD\n1vUHiCRpeA4bBkleC/xJVd0IUFXPVNUBYBWwoTXb0NZpP29qbe8FRpIsBM4FxqrqQJsreYxunmRJ\n0pANcmTwFuDXSW5Mcn+SryU5EVhYVeMAVfUYcEprvxjY1bf97labXN/TapKkIZs/YJuzgc9U1ZYk\nX6QbIjrUrM+TJ2SemCF6qomap3yO0dHRg8u9Xo9erzfAbkrSy8nm9pgeg4TBbmBXVW1p67fRhcF4\nkoVVNZ5kEfB4X/tT+7ZfAuxt9d6k+t1TvWB/GEiSptLj+R+p64/p2Q47TNSGgnYleVsrfQj4KbAR\nWNNqa4Db2/JGYDVAkuXA/vYcm4AV7cqkBcCKVpMkDdkgRwYAVwDfSPIK4BfAZcAJwK1JPgHsBC4C\nqKo7kpyf5GfAU60tVbUvydXAFrrhofXtRLIkachSdaih/+FIUjNtnyRpEMnEKdKhvDpVNdW52YF4\nB7IkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJ\nYsAwSPLLJD9O8kCS+1ptQZKxJNuTbEoy0tf+uiQ7kmxNsrSvfmmSR9s2q6e/O5KkozHokcFzQK+q\n3llV57TaWuDOqjoTuAu4CiDJSuD0qjoD+BRwfasvAD4PvAdYBqzrDxBJ0vAMGgaZou0qYENb3tDW\nJ+o3AVTVvcBIkoXAucBYVR1o012OAecdw75LkqbJoGFQwKYkP0zyyVZb2Ca6p6oeA05p9cXArr5t\nd7fa5PqeVpMkDdn8Adu9t6oeS/J6YCzJdg490efkOTgnJgWdam5OJzuWpBlgoDBof/lTVU8k+Wfg\nHGA8ycKqGk+yCHi8Nd8NnNq3+RJgb6v3JtXvnur1RkdHDy73ej16vd5UzSTpZWxze0yPVL34H+dJ\nTgTmVdVvk7yGbqx/PfAh4MmquibJWuCkqlqb5HzgM1V1QZLlwJeqank7gbwFOJtueGoL8K52/qD/\n9epw+yRJM1EyMRAylFenqqYagRnIIEcGC4HvJqnW/htVNZZkC3Brkk8AO4GLAKrqjiTnJ/kZ8BRw\nWavvS3I1XQgUsH5yEEiShuOwRwbHm0cGkmar2Xxk4B3IkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEk\nCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJLEEYRBknlJ7k+ysa2fluSeJNuT3Jxk\nfqu/MsktSXYk+UGSN/Y9x1Wtvi3JR6a/O5Kko3EkRwZXAg/3rV8DXFtVZwL7gctb/XK6uZHPAL4E\nfAEgyR8BHwPOAlYCX003LZAkacgGCoMkS4DzgX/oK38QuK0tbwA+2pZXtXWA77R2ABcCt1TVM1X1\nS2AHcM5R77kkadoMemTwReBvaJN7JnkdsK+qnmu/3w0sbsuLgV0AVfUscCDJyf31Zk/fNpKkITps\nGCS5ABivqq3AxLBO+pYnVN/vJqsXqUuShmz+AG3eB1yY5Hzg1cBr6c4FjCSZ144OlgB7W/vdwKnA\n3iQnACNVtS/JRH1C/zbPMzo6enC51+vR6/WOpE+S9DKwuT2mR6oG/+M8yZ8C/7GqLkzyLeCfqupb\nSf4b8OOquj7Jp4H/UFWfTnIx8NGquridQP4GsIxueOh7wBk1aQeSTC5J0qzQXRMzrM+vUFVHfVHO\nIEcGh7IWuCXJ1cADwA2tfgPw9SQ7gN8AFwNU1cNJbqW7Iulp4NN+6kvSzHBERwbHg0cGkmar2Xxk\n4B3IkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaB\nJAnDQJLEAGGQ5FVJ7k3yQJKHkqxr9dOS3JNke5Kbk8xv9VcmuSXJjiQ/SPLGvue6qtW3JfnIS9ct\nSdKROGwYVNXvgA9U1TuBpcDKJMuAa4Brq+pMYD9wedvkcuDJqjoD+BLwBYA2B/LHgLOAlcBX000L\nJEkasoGGiarq/7XFV9HNm1zAB4DbWn0D8NG2vKqtA3wH+GBbvhC4paqeqapfAjuAc45l5yVJ02Og\nMEgyL8kDwGPA94CfA/ur6rnWZDewuC0vBnYBVNWzwIEkJ/fXmz1920iShmj+II3ah/47k/wb4Lt0\nQz0vaNZ+TjX0Uy9Sf4HR0dGDy71ej16vN8huStLLyOb2mB4DhcGEqvq/Sf4FWA6clGReC4olwN7W\nbDdwKrA3yQnASFXtSzJRn9C/zfP0h4EkaSq99piw/piebZCrif5tkpG2/Grgw8DDwN3ARa3ZpcDt\nbXljW6f9/q6++sXtaqM3A28F7jumvZckTYtBjgz+HbAhyTy68PhWVd2RZBtwS5KrgQeAG1r7G4Cv\nJ9kB/Aa4GKCqHk5yK12QPA18uqqmHCaSJB1fmWmfx0nMCEmzUne1/LA+v0JVHfXl+t6BLEkyDCRJ\nhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJDDbt\n5ZIkdyV5OMlDSa5o9QVJxpJsT7JpYmrM9rvrkuxIsjXJ0r76pUkebdusfmm6JEk6Uoed6SzJImBR\nVW1N8ofAj4BVwGXAb6rqC0k+ByyoqrVJVgKfraoLkiwDvlxVy5MsALYAZwNpz3N2VR2Y9HrOdCZp\nVprTM51V1WNVtbUt/xbYBiyhC4QNrdmGtk77eVNrfy8wkmQhcC4wVlUHqmo/MAacd7Q7LkmaPkd0\nziDJacBS4B5gYVWNQxcYwCmt2WJgV99mu1ttcn1Pq0mShmz+oA3bENF3gCur6rdJDnUsNPkwZeK4\naarDlymfY3R09OByr9ej1+sNupuS9DKxuT2mx0BhkGQ+XRB8vapub+XxJAurarydV3i81XcDp/Zt\nvgTY2+q9SfW7p3q9/jCQJE2lx/M/Utcf07MNOkz034GHq+rLfbWNwJq2vAa4va++GiDJcmB/G07a\nBKxIMtJOJq9oNUnSkA1yNdH7gO8DD9EN6xTwt8B9wK10RwE7gYvaiWGSfIXu5PBTwGVVdX+rrwH+\nrj3H31fVTVO8nlcTSZqVZvPVRIcNg+PNMJA0W83mMPAOZEmSYSBJOoJLSyVptli06DTGx3817N2Y\nVTxnIGnOGd7YvecMJEmzmGEgSTIMJEmGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEk\niQHCIMkNScaTPNhXW5BkLMn2JJuSjPT97rokO5JsTbK0r35pkkfbNqunvyuSpKM1yJHBjcC5k2pr\ngTur6kzgLuAqgCQrgdOr6gzgU8D1rb4A+DzwHmAZsK4/QCRJw3XYMKiq/wXsm1ReBWxoyxva+kT9\nprbdvcBIkoV0YTJWVQfaPMljdHMkS5JmgKM9Z3BKVY0DVNVjwCmtvhjY1ddud6tNru9pNUnSDDDd\nM51NnlhhYqaHqSZcOOQMEKOjoweXe70evV5vGnZNkuaSze0xPY42DMaTLKyq8SSLgMdbfTdwal+7\nJcDeVu9Nqt99qCfvDwNJ0lR6PP9jdf0xPdugw0Th+X/dbwTWtOU1wO199dUASZYD+9tw0iZgRZKR\ndjJ5RatJkmaAwx4ZJPkmXfy8LslOYB3wX4BvJ/kEsBO4CKCq7khyfpKfAU8Bl7X6viRXA1vohofW\ntxPJkqQZIDNt8vkkNdP2SdLskgxrYvphvW732lU11fnZgXgHsiTJMJAkGQaSJAwDSRKGgSQJw0CS\nxPR/HYUkAbBo0WmMj/9q2LuhAXmfgaSXxPCu9YfhXe/vfQaSpFnMMJAkGQaSJMNAkoRhIEnCMJAk\nYRhIkhhCGCQ5L8kjSR5N8rnj/frSy8miRaeRZCgPzS7HNQySzAO+ApwLvB24JMm/P577MGybN28e\n9i68pOzfzNLdAVxH8Lj7CNu/2GOm2TzsHZjRjveRwTnAjqr6VVU9DdwCrDrO+zBUs+3D5EjZv9lu\n87B34CW0edg7MKMd7zBYDOzqW9/dapKkITreYTDVQOLQjiefeOKJ4z6Oun79+oPL27ZtG1bXJel5\njusX1SVZDoxW1XltfS1QVXVNX5uZONgoSTPesXxR3fEOgxOA7cCHgP8D3AdcUlX+iSxJQ3Rc5zOo\nqmeTfBYYoxuiusEgkKThm3HzGUiSjr8ZcQdyknlJ7k+ysa2fluSeJNuT3Jxk1s7IluSXSX6c5IEk\n97XagiRjrX+bkowMez+PRpKRJN9Osi3JT5Msm0N9e1t7z+5vPw8kuWKu9A8gyV8n+UmSB5N8I8kr\n59j/vSuTPNQeV7TarH3/ktyQZDzJg321Q/YnyXVJdiTZmmTp4Z5/RoQBcCXwcN/6NcC1VXUmsB+4\nfCh7NT2eA3pV9c6qOqfV1gJ3tv7dBVw1tL07Nl8G7qiqs4B3AI8wR/pWVY+29+xs4F3AU8B3mSP9\nS/IG4K+As6vqj+mGjC9hjvzfS/J2un1/N7AU+LMkb2V2v3830t2w22/K/iRZCZxeVWcAnwKuP+yz\nV9VQH8AS4HtAD9jYak8A89rycuB/DHs/j6F//xt43aTaI8DCtrwIeGTY+3kU/Xot8PMp6rO+b1P0\n6SPA/5xL/QPeAPwKWEAXBBuBFcDjc+H/HvAXwNf61v8T8DfAttn8/gFvAh7sW5/873FbW74e+Hhf\nu4P9PtRjJhwZfJHuTSqAJK8D9lXVc+33u+n+4c5WBWxK8sMkn2y1hVU1DlBVjwGvH9reHb23AL9O\ncmMbSvlakhOZG32b7OPAN9vynOhfVe0FrgV2AnuAA8D9wP458n/vJ8D72zDKicD5wKnMkfevzymT\n+nNKq0++wXcPh7nBd6hhkOQCYLyqtvL7G9LCC29Om81nud9bVe+m+8f4mSR/wuzuz4T5wNnAf61u\nKOUpukPWudC3g5K8ArgQ+HYrzYn+JTmJ7qtg3kT3gf8aYOUUTWdlf6vqEbohrzuBO4CtwDND3anj\n64hv8B32kcH7gAuT/AK4Gfgg8CVgpH2pHXTDSHuHtH/HrKU1VfUE8M903880nmQhQJJFdIfms81u\nYFdVbWnrt9GFw1zoW7+VwI+q6tdtfa7078PAL6rqyap6lu58yHuBk+bQ/70bq+pdVdUD9gGPMnfe\nvwmH6s9uuiOhCYd9L4caBlX1t1X1xqp6C3AxcFdV/SXdVyde1JpdCtw+rH08FklOTPKHbfk1dGPP\nD9GNz65pzWZl/9qh6a4kb2ulDwE/ZQ70bZJL6P5QmTBX+rcTWJ7kD5KE379/c+L/HkCS17efbwT+\nnO59nO3v3+SRk/7+rOH3/dkIrIaD3/ywf2I46ZCGfUKk7wTHn/L7E8hvBu6lS/JvAa8Y9v4dZZ/e\nTHd4+gBdCKxt9ZPpDl+30508P2nY+3qU/XsH8MPWx38CRuZK31r/Xk13McNr+2pzqX/r6E4sPghs\nAF4xV/7vtf59n+7cwQN0V/TN6veP7rzVXuB3dGF+Gd0FAFP2h266gJ8BP6a7auxFn9+bziRJQz9n\nIEmaAQwDSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CSBPx/C6YF+11elA8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f115112df10>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(filter(bool, map(lambda d: d['quality'], glebs_train_stats.values())))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### gleb's val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dec0db8332cd483ead43936f1a10f258",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "glebs_val_stats = make_stats(glebs_val_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../data/glebs_val_stats.json', 'w') as f:\n",
    "    json.dump(glebs_val_stats, f, indent=4, sort_keys=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(92, 333)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(filter(bool, map(lambda d: d['quality'], glebs_val_stats.values()))), len(filter(bool, map(lambda d: d['quality'], glebs_val_stats.values())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "133"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(map(lambda d: d['res_valid'], glebs_val_stats.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([   6.,    2.,    0.,  190.,    0.,   69.,    0.,    9.,   56.,    1.]),\n",
       " array([  92. ,   92.8,   93.6,   94.4,   95.2,   96. ,   96.8,   97.6,\n",
       "          98.4,   99.2,  100. ]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEACAYAAABfxaZOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAD/lJREFUeJzt3X+MZWV9x/H3B1dNEFmRyk4KCBZRaaNdm4bS1upUrQKJ\nbrUBapsoEiJVVKJpFarprsamYqyNP6IkCnZpRMXadjG1sBIYG2350SBlDQtiK/LD7pg2gEGtXdhv\n/7hn17vrnZ3LzJ05d/d5v5KTPfOc597z5Zy5n3nuc8+5pKqQJLXhkL4LkCStHkNfkhpi6EtSQwx9\nSWqIoS9JDTH0Jakhi4Z+kmOSXJfk9iTbkry1az8iydYkdya5Jsnaocd8JMldSW5Nsn4l/wMkSeMb\nZ6T/CPD2qvpF4NeB85M8B7gQuLaqng1cB1wEkOQ04ISqOhE4D7hkRSqXJD1mi4Z+Ve2oqlu79YeB\n7cAxwAZgc9dtc/cz3b+Xd/1vBNYmWTfhuiVJS/CY5vSTHA+sB24A1lXVPAz+MABHdd2OBu4detj9\nXZskqWdjh36Sw4C/BS7oRvwLfX9DRrT5XQ+SNAXWjNMpyRoGgf83VbWla55Psq6q5pPMAN/v2u8D\njh16+DHA90Y8p38IJGkJqmrU4Hos4470LwNur6oPD7VdBZzdrZ8NbBlqfy1AklOAB3dPA+2rqqZ+\n2bhxY+81WKd1Hqg1Wufkl+VadKSf5DeBPwS2JfkGg6maPwUuBq5Mcg5wD3BGF+RfTnJ6km8DPwRe\nv+wqJUkTsWjoV9XXgcctsPmlCzzmzcspSpK0MrwjdxGzs7N9lzAW65ysA6HOA6FGsM5pk0nMES1p\nx0n1tW9JOlAloVbhg1xJ0kHA0Jekhhj6ktQQQ1+SGmLoS1JDDH3tMTNzPEl6XWZmju/7MEgHNS/Z\n1B5J6P+78TKRW82lg5WXbEqSxmboS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi\n6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+\nJDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtS\nQwx9SWqIoS9JDTH0Jakhhr4kNWTR0E9yaZL5JLcNtW1Mcl+SW7rl1KFtFyW5K8n2JC9bqcIlSY/d\nOCP9TwMvH9H+oar6lW65GiDJScCZwEnAacDHk2Ri1UqSlmXR0K+qrwEPjNg0Ksw3AJ+rqkeq6m7g\nLuDkZVUoSZqY5czpn5/k1iSfSrK2azsauHeoz/1dmyRpCqxZ4uM+Dry3qirJ+4C/BM5l9Oi/FnqS\nTZs27VmfnZ1ldnZ2ieVI0sFpbm6Oubm5iT1fqhbM5J92So4DvlRVz9vftiQXAlVVF3fbrgY2VtWN\nIx5X4+xbq2fw8Uvf5yT4eyEtLAlVteTPSsed3glDo/gkM0PbXg18s1u/Cvj9JE9I8gzgmcBNSy1O\nkjRZi07vJLkCmAWOTHIPsBH47STrgV3A3cB5AFV1e5IrgduBncCbHM5L0vQYa3pnRXbs9M7UcXpH\nmn6rNb0jSToIGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLo\nS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4k\nNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JD\nDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhi4Z+kkuT\nzCe5bajtiCRbk9yZ5Joka4e2fSTJXUluTbJ+pQqXJD1244z0Pw28fJ+2C4Frq+rZwHXARQBJTgNO\nqKoTgfOASyZYqyRpmRYN/ar6GvDAPs0bgM3d+ubu593tl3ePuxFYm2TdZEqVJC3XUuf0j6qqeYCq\n2gEc1bUfDdw71O/+rk2SNAXWTPj5MqKtFuq8adOmPeuzs7PMzs5OuBxJOrDNzc0xNzc3sedL1YKZ\n/NNOyXHAl6rqed3P24HZqppPMgNcX1UnJbmkW/981+8O4EW73xXs85w1zr61epKwn7/Rq1UF/l5I\nC0tCVY0aYI9l3OmdsPco/irg7G79bGDLUPtru8JOAR4cFfiSpH4sOtJPcgUwCxwJzAMbgX8AvgAc\nC9wDnFFVD3b9PwacCvwQeH1V3bLA8zrSnzKO9KXpt9yR/ljTOyvB0J8+hr40/VZrekeSdBAw9CWp\nIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi\n6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+\nJDXE0Jekhhj6ktQQQ1+SGmLoSyPMzBxPkl6XmZnj+z4MOgilqvrZcVJ97VujJQH6PidhGn4vPBaa\nVkmoqiz18Y70Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jek\nhhj6ktQQQ1/S1JuGbz09WL751G/Z1B5+s+RQFR6LqTId5wOm4Zz4LZuSpLGtWc6Dk9wNPATsAnZW\n1clJjgA+DxwH3A2cWVUPLbNOSdIELHekvwuYrarnV9XJXduFwLVV9WzgOuCiZe5DkjQhyw39jHiO\nDcDmbn0z8LvL3IckaUKWG/oFXJPk5iTndm3rqmoeoKp2AE9b5j4kSROyrDl94DeqakeSpwFbk9zJ\nY/iIfdOmTXvWZ2dnmZ2dXWY5knRwmZubY25ubmLPN7FLNpNsBB4GzmUwzz+fZAa4vqpOGtHfSzan\nzHRcFtf/JXHgsZg203E+YBrOSW+XbCY5NMlh3fqTgJcB24CrgLO7bq8Dtix1H5KkyVrO9M464O+T\nVPc8n6mqrUn+DbgyyTnAPcAZE6hTkjQB3pGrPabjLXT/b5/BYzFtpuN8wDScE+/IlSSNzdCXpIYY\n+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEv\nSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLU\nEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x\n9CWpIYa+JDVkTd8FSJpuMzPHMz//3b7L0ISkqvrZcVJ97VujJQH6PidhGn4vPBZDVUzJsei/BpiG\nc5KEqspSH9/rSP+Tn/xkn7vnyU9+MmeddVb3Sy1JB79eR/qHHnpuL/ve7dFHv8jNN3+V5z73ub3W\nMS2mZUTX90gKPBZ7VTElx6L/GmAazskBPdL/0Y/6HekffviNvZ9ASVpNK3b1TpJTk9yR5FtJ3rlS\n+5EkjW9FQj/JIcDHgJcDvwS8JslzVmJfK21ubq7vEsZyoNQJc30XcNDwnE/aXN8FrIqVGumfDNxV\nVd+tqp3A54ANK7SvFXWgvLAOlDpbeWGtBs/5pM31XcCqWKnQPxq4d+jn+7q2qfPiF59OkgWX97zn\nPfvdPollZub4vg+DpEas1Ae5oz5Z/plPTA8//BUrtPvx/PjH3+EHP3iY/V8VsKlbVs78vJeMSgeG\nJx7wl3ivyCWbSU4BNlXVqd3PFwJVVRcP9fGyGUlaguVcsrlSof844E7gJcB/ATcBr6mq7RPfmSRp\nbCsyvVNVjyZ5M7CVwecGlxr4ktS/3u7IlSStvlX5auUkFyTZ1i1v7do+kGR7kluTfDHJ4atRyxLq\nfG+Sf0/yjSRXJ5mZxjqHtv1xkl1JntpXfUO1jDqeG5Pcl+SWbjl1yuq8YKj9Ld0NhtuSvL/PGrt6\nRh3Pzw0dy+8kuWVK6/zlJP/avY5uSvKrU1rjv3Sv9y1JDuuptkuTzCe5bajtiCRbk9yZ5Joka4e2\nfSTJXV2Wrl90B1W1oguDm7NuA54IPI7BlM8JwEuBQ7o+7wf+YqVrWWKdhw31eQvwiSmr8yvACd22\nY4Crge8AT52yOrcCzwQ2Am/vs7Yxz/tst76m6/dzU1bnnvM+1OeDwLunrM7d5/0a4GVdn9OA66ew\nxpuAF3R9zgbe21N9LwDWA7cNtV0MvKNbfyfw/qFj+Y/d+q8BNyz2/Ksx0j+pK+QnVfUo8M/Aq6rq\n2qra1fW5gUFg9WmhOh8e6vMkYNfIR6+efev8KvCqbttfAX/SW2V7G3k8u23TdM3bqDpfDbyRwQvr\nEYCq+u8ea4T9n/fdzgQ+u+qV7W2h874L2D06fQpwf0/1wcI1Pquqvtb1uRb4vT6K62p4YJ/mDcDm\nbn0zP73ZdQNwefe4G4G1Sdbt7/lXI/S/Cbywe3tyKHA6cOw+fc4B/mkVatmfBetM8r4k9wB/APxZ\njzXCAnUmeQVwX1Vt67e8PUbVeQyDmyLO796Kfmr4bWpPFjrvJ3btNyS5vu/pCBZ5HSX5LWBHVf1H\nXwV2FjrvbwM+2L2OPgBcNGU1Hgt8M8kruz5n0v9AdNhRVTUPUFU7gKO69n1vhL2fRW6EXfHQr6o7\nGLw1uRb4MnAr8Mju7UneBeysqitWupb92V+dVfXuqno68BkGUzy9WaDOR4F3sfcfpF5H0/s5np9g\nMC2xHtgBfKi3ItlvnY8HnlJVpwDvAK7srUgWfx0Br6H/Uf7+6nwjcEH3OnobcNmU1biTweDz/CQ3\nM3hX/3991fgYjHUj7N5bV3++6s+BP+rWXwd8HXhiH3Nn49Y51PZ0YFvftY2o8y0MAvQ/Gczn7wTu\nZjA66L3G/RzP4xiat5yGZXedDMLghUPt3waO7Lu+UceTwbz0DuDn+65rgTrfCDywT/tDfdc26lgO\ntZ3IGPPjK1jTXq8NYDuwrlufAbZ365cAZw31u2N3v4WW1bp652ndv09nMHf22e6qjXcAr6yqn6xG\nHYtZoM5nDnXZwODg92pEnZdX1UxV/UJVPYPBdx09v6q+P2V1fnafq59ezeCtdq9G1QlsYXBzIUme\nBTy+qv6ntyJZsE6A32EQAt/rq7ZhI+q8Avhekhd17S8BvtVfhQv+bu5uOwR4N4NA7UvYexR/FYMP\nl+n+3TLU/lrY800ID1Y3DbSQ1fqfqHyxu4RwJ/CmqnooyUeBJwBfyeC7LG6oqjetUj0LGVXnpd2L\nfhfwXQajwL79TJ37bC+m48PSkee9u6xsF4N3I+f1WWBnVJ2XAZcl2Qb8hO6F1bOFzvtZTMHUzpBR\nx/MNwIczuFv/f4E39Frh6BrfmuR8Bq+fv6uqv+6jsCRXMLh67MjuM5CNDK5w/EKSc4B7gDMAqurL\nSU5P8m3gh8DrF33+7i2BJKkBqzK9I0maDoa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kN\n+X9omfOGh+KJ5AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f1150204550>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(filter(bool, map(lambda d: d['quality'], glebs_val_stats.values())))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### esato"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "14a6258fc3a84de289267c21955bffbe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/dist-packages/PIL/TiffImagePlugin.py:569: UserWarning: Metadata Warning, tag 256 had too many entries: 2, expected 1\n",
      "  tag, len(values)))\n",
      "/usr/local/lib/python2.7/dist-packages/PIL/TiffImagePlugin.py:569: UserWarning: Metadata Warning, tag 257 had too many entries: 2, expected 1\n",
      "  tag, len(values)))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "esato_stats = make_stats(esato_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../data/esato_stats.json', 'w') as f:\n",
    "    json.dump(esato_stats, f, indent=4, sort_keys=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1519"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(esato_stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "80"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(map(lambda d: d['quality'], esato_stats.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "175"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(map(lambda d: d['res_valid'], esato_stats.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([  1.00000000e+00,   0.00000000e+00,   3.00000000e+00,\n",
       "          0.00000000e+00,   8.00000000e+00,   4.30000000e+01,\n",
       "          5.40000000e+01,   4.60000000e+01,   1.03100000e+03,\n",
       "          3.33000000e+02]),\n",
       " array([  80.,   82.,   84.,   86.,   88.,   90.,   92.,   94.,   96.,\n",
       "          98.,  100.]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEACAYAAABRQBpkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEUZJREFUeJzt3X+s3XV9x/HnCyr+Xiloe2MLdiJDJNOKU6tbxpks0uJi\n2SILGCYgGhbxxzTbLC5ZL9kyhwnKCNnIJiFgFAaySZcQ6Eg5ISaCM7VBoUAXI21BrnNQk7GEFHjv\nj/MtO72e9l7uOdzvubfPR3LS7/fz/XHe99tvv6/v53PO9zZVhSTp8HZE2wVIktpnGEiSDANJkmEg\nScIwkCRhGEiSmEUYJLk2yVSS+/vavpxkR5LtSW5N8it9yy5NsrNZ/oG+9nVJHkrySJIvjP5HkSTN\n1Wx6BtcBZ0xr2wKcUlVrgJ3ApQBJ3gr8IXAysB74+/QcAVzd7OcU4NwkbxnNjyBJGtaMYVBV3wGe\nmtZ2V1U938zeC6xqpj8E3FRVz1bVT+gFxbub186qerSq9gE3ARtG8yNIkoY1is8MPgbc3kyvBHb3\nLXusaZvevqdpkySNgaHCIMlfAPuq6sb9TQNWq0O0S5LGwJK5bpjkfOBM4P19zXuA4/rmVwGP0wuD\n4we0D9qvISFJc1BVg268Z2W2PYPQd3efZB3w58CHquqZvvU2A+ckOSrJrwJvBr4H/Afw5iRvTHIU\ncE6z7kBV5WtEr02bNrVew2J6eTw9nuP6GtaMPYMk3wQ6wLFJdgGbgC8CRwH/ngTg3qr6ZFU9mORm\n4EFgH/DJ6lX5XJJP0fsW0hHAtVW1Y+jqJUkjMWMYVNVHBjRfd4j1vwR8aUD7HcBJL6o6SdK88Ank\nRa7T6bRdwqLi8Rwtj+f4yCjGmkYpSY1bTZI07pJQ8/ABsiRpETMMJEmGgSTJMJAkYRhIkjAMJEkY\nBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiS\nMAwkSRgGkiQMA0kShoEkCcNA0gI2MbGaJK2/JiZWt30ohjZjGCS5NslUkvv72pYl2ZLk4SR3Jlna\nt+yqJDuTbE+ypq/9/CSPNNt8dPQ/iqTDzdTUo0C1/urVsbDNpmdwHXDGtLaNwF1VdRKwFbgUIMl6\n4ISqOhG4GLimaV8G/CXwLuA9wKb+AJEktWvGMKiq7wBPTWveAFzfTF/fzO9vv6HZ7j5gaZIV9MJk\nS1X9oqr2AluAdcOXL0kahbl+ZrC8qqYAquoJYHnTvhLY3bfenqZtevtjTZskaQyM+gPkDJivAe00\n7ZKkMbBkjttNJVlRVVNJJoCfNe17gOP61lsFPN60d6a1332wnU9OTr4w3el06HQ6B1tVkg5L3W6X\nbrc7sv2lauYb9CSrgX+rql9v5i8Hnqyqy5NsBI6uqo1JzgQuqaoPJlkLXFlVa5sPkL8PnEqvN/J9\n4J3N5wfT36tmU5MkJfsHH9oW2r5uJaGqBo3CzMqMPYMk36R3V39skl3AJuBvgVuSfAzYBZwNUFW3\nJzkzyX8CTwMXNu1PJfkreiFQwGWDgkCS1I5Z9Qzmkz0DSbNlz6CvgiF7Bj6BLEkyDCRJhoEkCcNA\nkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKG\ngSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEliyDBI8rkkP0pyf5JvJDkq\nyeok9yZ5OMmNSZY06x6V5KYkO5N8N8nxo/kRJEnDmnMYJHkD8Gng1Kp6G7AEOBe4HLiiqk4C9gIX\nNZtcBDxZVScCVwJfHqZwSdLoDDtMdCTw6ubu/5XA48DvALc2y68HzmqmNzTzAN8CTh/yvSVJIzLn\nMKiqx4ErgF3AY8AvgG3A3qp6vlltD7CymV4J7G62fQ7Ym+SYub6/JGl0lsx1wyRH07vbfyO9ILgF\nWD9g1dq/yfRd9C07wOTk5AvTnU6HTqcz1zIlaVHqdrt0u92R7S9VA6/HM2+YfBg4o6o+0cz/EfBe\n4MPARFU9n2QtsKmq1ie5o5m+L8mRwE+ravmA/dZca5J0eEkOek85z0Lb160kVNX0m+5ZG+Yzg13A\n2iSvSO9v5HTgAeBu4OxmnfOB25rpzc08zfKtQ7y3JGmE5twzAEiyCTgH2Af8APg4sAq4CVjWtJ1X\nVfuSvBz4OvAO4L+Bc6rqJwP2ac9A0qzYM+irYMiewVBh8FIwDCTNlmHQV0GLw0SSpEXCMJAkGQaS\nJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwD\nSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksSQYZBkaZJbkuxI8kCS9yRZ\nlmRLkoeT3Jlkad/6VyXZmWR7kjXDly9JGoVhewZ/B9xeVScDbwceAjYCd1XVScBW4FKAJOuBE6rq\nROBi4Joh31uSNCKpqrltmLwW2F5VJ0xrfwg4raqmkkwAd1fVyUmuaab/uVlvB9Cpqqlp29dca5J0\neEkCjMP1IrR93UpCVWWu2w/TM3gT8PMk1yXZluQfk7wKWLH/Al9VTwDLm/VXArv7tn+saZMktWzJ\nkNueClxSVd9P8lV6Q0QHi8dBiTVw3cnJyRemO50OnU5niDIlafHpdrt0u92R7W+YYaIVwHer6k3N\n/G/RC4MTaIZ/ZhgmemE4adp+HSaSNCsOE/VV0NYwUXMR353k15qm04EHgM3ABU3bBcBtzfRm4KMA\nSdYCe6cHgSSpHXPuGQAkeTvwNeBlwI+BC4EjgZuB44BdwNlVtbdZ/2pgHfA0cGFVbRuwT3sGkmbF\nnkFfBUP2DIYKg5eCYSBptgyDvgpa/DaRJGmRMAwkSYaBJMkwkCRhGEiSMAwkSRgGkiQMA0kShoEk\nCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwD\nSRKGgSQJw0CShGEgScIwkCQxgjBIckSSbUk2N/Ork9yb5OEkNyZZ0rQfleSmJDuTfDfJ8cO+tyRp\nNEbRM/gs8GDf/OXAFVV1ErAXuKhpvwh4sqpOBK4EvjyC95YkjcBQYZBkFXAm8LW+5vcDtzbT1wNn\nNdMbmnmAbwGnD/PekqTRGbZn8FXgz4ACSHIs8FRVPd8s3wOsbKZXArsBquo5YG+SY4Z8f0nSCCyZ\n64ZJPghMVdX2JJ39zc2rX/UtO2AXfcsOMDk5+cJ0p9Oh0+kMWk2SDlvdbpdutzuy/aVq4PV45g2T\nvwHOA54FXgm8Fvg28AFgoqqeT7IW2FRV65Pc0Uzfl+RI4KdVtXzAfmuuNUk6vCQHvaecZ6Ht61YS\nqmr6TfeszXmYqKq+WFXHV9WbgHOArVV1HnA3cHaz2vnAbc305maeZvnWub63JGm0XornDDYCn0/y\nCHAMcG3Tfi3wuiQ7gT9p1pMkjYE5DxO9VBwmkjRbDhP1VdDWMJEkafEwDCRJhoEkyTCQJGEYSJIw\nDCRJGAaSJAwDSRJD/KI6SdJ+L28egFu4DANJGtoztP8k9HBh5DCRJMkwkCQZBpIkDANJEoaBJAnD\nQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSQwRBklWJdma\n5MEkP0zymaZ9WZItSR5OcmeSpX3bXJVkZ5LtSdaM4geQJA1vmJ7Bs8Dnq+qtwHuBS5K8BdgI3FVV\nJwFbgUsBkqwHTqiqE4GLgWuGqlySNDJzDoOqeqKqtjfT/wPsAFYBG4Drm9Wub+Zp/ryhWf8+YGmS\nFXN9f0nS6IzkM4Mkq4E1wL3Aiqqagl5gAMub1VYCu/s2e6xpkyS1bOgwSPIa4FvAZ5seQh1s1QFt\nB1tXkjSPlgyzcZIl9ILg61V1W9M8lWRFVU0lmQB+1rTvAY7r23wV8Pig/U5OTr4w3el06HQ6w5Qp\nSYtQt3mNRqrmfnOe5Abg51X1+b62y4Enq+ryJBuBo6tqY5IzgUuq6oNJ1gJXVtXaAfusYWqSdPhI\nwngMMIxDHaGqBo3AzG7ruV54k/wmcA/wQ3pHoYAvAt8DbqbXC9gFnF1Ve5ttrgbWAU8DF1bVtgH7\nNQwkzYphcGANrYTBS8UwkDRbhsGBNQwTBj6BLEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkY\nBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiS\nMAykBWdiYjVJWn9NTKxu+1BohFJVbddwgCQ1bjVJ4yQJMA7/Rl4BPNN2EYzHsRiHv5NQVZnr1ktG\nWYq02E1MrGZq6tG2yxgTzzAOF0CNhj0D6UUYj7vycagBxqOOcagBxqOO4XoGfmYgSZr/MEiyLslD\nSR5J8oX5fn9J0i+b1zBIcgRwNXAGcApwbpK3zGcNh5tut9t2CdIhdNsuQI357hm8G9hZVY9W1T7g\nJmDDPNdwWDEMNN66bRegxnyHwUpgd9/8nqZNOqRx+W69tFjNdxgM+tf0Sx/Bt/0PPgn33HPPPByO\nQxvFBfCyyy5bFA8X9b7OWWPwkhan+X7OYA9wfN/8KuDxea5hVk477bS2SxgbU1OPjsld8TjUAONR\nxzjUAKOp47IxqGEUxqWOuZnX5wySHAk8DJwO/BT4HnBuVe2YtyIkSb9kXnsGVfVckk8BW+gNUV1r\nEEhS+8buCWRJ0vxr/QnkJJ9L8qMk9yf5RpKjkqxOcm+Sh5PcmMTfoTQLA47ly5Ncl+THSX6QZFuS\nt7Vd50KR5LNJfti8PtO0LUuypTk370yytO06F4qDHM9NSfY05+a2JOvarnNcJbk2yVSS+/vaDno+\nJrkqyc4k25OsmWn/rYZBkjcAnwZOraq30Ru2Ohe4HLiiqk4C9gIXtVflwnCQY3kOva/A/GlVvaOq\nTq2q+w+1H/UkOYXeefcbwBrg95K8GdgI3NWcm1uBS9urcuE4xPEE+Epzbp5aVXe0VuT4u47eA7v9\nBp6PSdYDJ1TVicDFwDUz7bz1ngFwJPDq5u7/lfS+XfQ7wK3N8uuB32+ptoWm/1i+CniM3lccFvbX\nHNpxMnBvVT1TVc8B99A7Dz9E75yk+fOslupbaA52PMHzc1aq6jvAU9OaN3Dg+bihr/2GZrv7gKVJ\nVhxq/62GQVU9DlwB7KJ34foFsA3YW1XPN6vtAd7QToULx4Bjubeq7moW/3XTVbwiyctaK3Jh+RHw\n2003/FXAmcBxwIqqmgKoqieA17dY40Iy6HiuotdzvaQ5P7/msNuLtnza+bi8aZ/+gO9jzPCAb9vD\nREfTS7A30rvgvxpYP2BVP+WewYBj+ZokHwE2VtXJwLuAYwF/OeAsVNVD9IYr7wJuB7YDz7Za1AJ2\niOP5D/SGM9YATwBfaa3IxWVWD/j2a3uY6HeBH1fVk03X8V+B9wFHp/dL7WCMH0wbM9OP5b8A7+u7\na9hHb8zx3S3WuKBU1XVV9c6q6tDrnj8CTO3vbieZAH7WYokLyoDjubOq/qvvPzD5J3o3LZq9g52P\ne+j1ZPeb8TradhjsAtYmeUV6j7ieDjwA3A2c3axzPnBbS/UtJIOO5Y7mBKFpO4ted12zkOT1zZ/H\n0xvfvhHYDFzQrOK5+SIMOp77z8/GH+D5OZPpnwH2n48X8P/n42bgowBJ1tIbNp465I7bfs4gySZ6\n33rZB/wA+Di9FLsJWNa0ndfc2eoQph3LbcAngDuA19E7gbYDf1xV/9takQtIknuAY+gdz89VVTfJ\nMcDN9O66dgFnV9XeFstcMA5yPG+g9+2i54GfABfPdNE6XCX5JtChN9w7BWwCvg3cwoDzMcnVwDrg\naeDCqtp2yP23HQaSpPa1PUwkSRoDhoEkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEkC/g+ZUmwEBTaV\n3AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f11501cff90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(filter(bool, map(lambda d: d['quality'], esato_stats.values())))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### NEW: artgor data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d74ef1f4caef41d0858b56aade316111",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "artgor_stats = make_stats(artgor_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../data/artgor_stats.json', 'w') as f:\n",
    "    json.dump(artgor_stats, f, indent=4, sort_keys=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "533"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(artgor_stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "90"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(map(lambda d: d['quality'], artgor_stats.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(map(lambda d: d['res_valid'], artgor_stats.values()))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
